The data used in this notebook is from the COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. The package used to retrieve data information can be found here.
Accessed dataset on: 2020-08-11
data("coronavirus")
head(coronavirus)
coronavirus <- coronavirus %>%
mutate(country = replace(country, country == "US", "United States"))
# Fill empty province with NA
coronavirus$province[coronavirus$province == ""] <- NA
# Population data
library(wpp2019)
data(pop)
keeps <- c("name","2020")
pop_2020 = pop[keeps]
names(pop_2020)[2] <- "population"
pop_2020 <- pop_2020 %>%
mutate(name = replace(name, name == "United States of America", "United States")) %>%
mutate(name = replace(name, name == "Iran (Islamic Republic of)", "Iran")) %>%
mutate(name = replace(name, name == "Russian Federation", "Russia")) %>%
mutate(name = replace(name, name == "Bolivia (Plurinational State of)", "Bolivia")) %>%
mutate(name = replace(name, name == "Republic of Moldova", "Moldova")) %>%
mutate(name = replace(name, name == "Venezuela (Bolivarian Republic of)", "Venezuela"))
pop_2020$population <- pop_2020$population*1000
# add population of each country
cases_pc_df <- left_join(coronavirus, pop_2020, by = c("country" = "name"))
`summarise()` regrouping output by 'country', 'long', 'lat', 'population' (override with `.groups` argument)
top_10_confirmed_df <- world_confirmed_cases_df[1:10,]
world_confirmed_graph <-
ggplot(data = top_10_confirmed_df,
aes(x = reorder(country, total_cases),
y = total_cases)) +
labs( x = "Country",
y = "Total Confirmed Cases",
title = "Top 10 Countries With Higest Total Confirm Cases") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
geom_col(aes(fill = total_cases)) +
scale_fill_gradient2(low = "thistle1",
high = "mediumorchid1",
midpoint = median(top_10_confirmed_df$total_cases)) +
geom_col(
aes( y= 40),
fill = "white",
width = 1,
alpha = 0.2,
size = 0
) +
geom_col(
aes( y = 20),
fill = "white",
width = 1,
alpha = 0.2,
size = 0
)
world_confirmed_graph
world_death_cases_df <- coronavirus %>%
filter(type == "death") %>%
group_by(country, long, lat, province) %>%
summarise(total_deaths = sum(cases)) %>%
arrange(-total_deaths)
`summarise()` regrouping output by 'country', 'long', 'lat' (override with `.groups` argument)
top_10_death_df <- world_death_cases_df[1:10,]
world_death_graph <-
ggplot(data = top_10_death_df, aes(x = reorder(country, total_deaths), y = total_deaths)) +
geom_col(aes(fill = total_deaths)) +
labs( x = "Country",
y = "Total Death Cases",
title = "Top 10 Countries With Higest Death Cases") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
scale_fill_gradient2(low = "red",
high = "red4",
midpoint = median(top_10_death_df$total_deaths)) +
geom_col(
aes( y= 40),
fill = "white",
width = 1,
alpha = 0.2,
size = 0
) +
geom_col(
aes( y = 20),
fill = "white",
width = 1,
alpha = 0.2,
size = 0
)
world_death_graph
world_recovered_cases_df <- coronavirus %>%
filter(type == "recovered") %>%
group_by(country, long, lat, province) %>%
summarise(total_recovered = sum(cases)) %>%
arrange(-total_recovered)
`summarise()` regrouping output by 'country', 'long', 'lat' (override with `.groups` argument)
top_10_recovered_df <- world_recovered_cases_df[1:10,]
world_recover_graph <-
ggplot(data = top_10_recovered_df,
aes(x = reorder(country, total_recovered),
y = total_recovered)) +
geom_col(aes(fill = total_recovered)) +
labs( x = "Country",
y = "Total Recovered Cases",
title = "Top 10 Countries With Higest Total Recovered Cases") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
scale_fill_gradient2(low = "green",
high = "green4",
midpoint = median(top_10_recovered_df$total_recovered)) +
geom_col(
aes( y= 40),
fill = "white",
width = 1,
alpha = 0.2,
size = 0
) +
geom_col(
aes( y = 20),
fill = "white",
width = 1,
alpha = 0.2,
size = 0
)
world_recover_graph
world_cases_by_date_df <- coronavirus %>%
filter(type == "confirmed") %>%
group_by(date) %>%
summarise(total_cases = sum(cases)) %>%
arrange(-total_cases)
`summarise()` ungrouping output (override with `.groups` argument)
ggplot(world_cases_by_date_df, aes(date, total_cases)) +
geom_line() +
labs(x = "Month",
y = "Cases",
title ="Cumulative Confirmed Cases Worldwide") +
theme(plot.title =element_text(hjust = 0.5)) +
scale_x_date(labels = date_format("%b"), date_breaks = "1 month")
# List of countries to include in the graph
country_list <- c("United States", "Brazil", "India", "Russia", "Mexico", "China", "Canada")
top_country_df <- cases_pc_df %>%
filter(country %in% country_list) %>%
filter(type == "confirmed") %>%
group_by(date, country, population) %>%
summarise(total_cases = sum(cases)) %>%
arrange(-total_cases)
`summarise()` regrouping output by 'date', 'country' (override with `.groups` argument)
total_cases <- top_country_df$total_cases
population <- top_country_df$population
top_country_df$`Total Cases` <- total_cases*1000000/population
# draw a line plot of total_cases vs. date, grouped and colored by country
g <- ggplot(data = top_country_df,
aes(x = date,
y = `Total Cases`,
color = country,
group = country)) +
geom_line() +
labs(x= "Month",
y=" Daily Confirmed Cases per Million People",
title = "Daily Confirmed Cases by Country (2020)") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_x_date(labels = date_format("%b"), date_breaks ="1 month")
ggly <- ggplotly(p = g,
width = 1000,
height = 700,
tooltip = c("date", "Total Cases", "group"))
`group_by_()` is deprecated as of dplyr 0.7.0.
Please use `group_by()` instead.
See vignette('programming') for more help
This warning is displayed once every 8 hours.
Call `lifecycle::last_warnings()` to see where this warning was generated.
ggly
# Load packages and world map data
library(sf)
library(tmap)
library(spData)
library(viridis)
library(rnaturalearth)
world <-map_data("world")
breaks<- c(1, 30, 100, 1000, 50000, 100000)
labels<- c("1-29", "20-99", "100-999","1,000-49,999", "50,000-499,999", "100,000+")
confirm_map <- ggplot() +
geom_polygon(data = world,
aes(x = long, y = lat, group = group),
fill = "grey", alpha = 0.3) +
geom_point(data = world_confirmed_cases_df,
aes(x = long,
y = lat,
size = total_cases,
color = total_cases,
text_country = country,
text_province = province,
text = paste("Deaths: ", total_cases)),
alpha = 0.5) +
scale_size_continuous(name = "Confirmed cases", trans="log", range=c(1,8),
breaks = breaks,labels=labels) +
scale_colour_viridis_c(option = "plasma",
direction = -1,
name = "Confirmed cases",
trans = "log",
breaks = breaks,
labels = labels) +
guides(colour=guide_legend()) +
theme_void() +
labs(title = "Map of Confirmed Cases") +
theme(legend.position="bottom",
plot.title = element_text(hjust = 0.5))
Ignoring unknown aesthetics: text_country, text_province, text
confirm_map_plotly <- ggplotly(p = confirm_map,
width = 1000,
height = 700,
tooltip = c("text_country", "text_province", "text"))
Transformation introduced infinite values in discrete y-axisTransformation introduced infinite values in discrete y-axisNaNs produced
confirm_map_plotly
breaks<- c(1, 30, 100, 1000, 50000, 100000)
labels<- c("1-29", "20-99", "100-999","1,000-49,999", "50,000-499,999", "100,000+")
death_map <- ggplot() +
geom_polygon(data = world,
aes(x = long, y = lat, group = group),
fill = "grey", alpha = 0.3) +
geom_point(data = world_death_cases_df,
aes(x = long,
y = lat,
size = total_deaths,
color = total_deaths,
text_country = country,
text_province = province,
text = paste("Deaths: ", total_deaths)),
alpha = 0.5) +
scale_size_continuous(name = "Death cases", trans="log", range=c(1,8),
breaks = breaks,labels=labels) +
scale_colour_viridis_c(option = "inferno",
direction = -1,
name = "Death cases",
trans = "log",
breaks = breaks,
labels = labels) +
guides(colour=guide_legend()) +
theme_void() +
labs(title = "Map of Death Cases") +
theme(legend.position="bottom",
plot.title = element_text(hjust = 0.5))
Ignoring unknown aesthetics: text_country, text_province, text
death_map_plotly <- ggplotly(p = death_map,
width = 1000,
height = 700,
tooltip = c("text_country", "text_province", "text"))
Transformation introduced infinite values in discrete y-axisTransformation introduced infinite values in discrete y-axisNaNs produced
death_map_plotly
NA